<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <title>决策树总结 | Hexo</title>
  <meta name="keywords" content=" 50 ">
  <meta name="description" content="决策树总结 | Hexo">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="description" content="第二章 线性表线性表：表内数据类型相同，有限序列 本章将以总结的形式展现： 2.1 顺序表与链式表的区别     顺序表 链式表     存取 随机存取 顺序存取   结构 顺序存储（连续） 随机存储（不连续）   空间分配 静态存储（可以动态分配） 动态存储   操作 查找 O(1) ,插入和删除O（n） 查找 O(n) ,插入和删除O（1）   缺点 插入删除不便，长度不可以改变 查找速度慢，">
<meta property="og:type" content="article">
<meta property="og:title" content="数据结构">
<meta property="og:url" content="http://yoursite.com/2021/03/12/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/index.html">
<meta property="og:site_name" content="Hexo">
<meta property="og:description" content="第二章 线性表线性表：表内数据类型相同，有限序列 本章将以总结的形式展现： 2.1 顺序表与链式表的区别     顺序表 链式表     存取 随机存取 顺序存取   结构 顺序存储（连续） 随机存储（不连续）   空间分配 静态存储（可以动态分配） 动态存储   操作 查找 O(1) ,插入和删除O（n） 查找 O(n) ,插入和删除O（1）   缺点 插入删除不便，长度不可以改变 查找速度慢，">
<meta property="og:locale" content="en_US">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/171951-630686.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/171844-136893.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/203103-111988.jpeg">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/171839-101678.jpeg">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/200119-510473.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202103/05/113954-515448.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/203146-263519.jpeg">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/153239-688589.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/153248-836462.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/153259-101399.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202103/11/113055-306258.jpeg">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/151817-474283.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/02/103950-168345.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/02/105245-140913.png">
<meta property="og:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/19/153313-134836.png">
<meta property="article:published_time" content="2021-03-11T16:00:00.000Z">
<meta property="article:modified_time" content="2021-03-11T07:19:09.038Z">
<meta property="article:author" content="MOLU">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://gitee.com/moluggg/image/raw/master/img/202102/23/171951-630686.png">


<link rel="icon" href="/img/avatar.jpg">

<link href="/css/style.css?v=1.1.0" rel="stylesheet">

<link href="/css/hl_theme/github.css?v=1.1.0" rel="stylesheet">

<link href="//cdn.jsdelivr.net/npm/animate.css@4.1.0/animate.min.css" rel="stylesheet">

<script src="//cdn.jsdelivr.net/npm/jquery@3.5.1/dist/jquery.min.js"></script>
<script src="/js/titleTip.js?v=1.1.0" ></script>

<script src="//cdn.jsdelivr.net/npm/highlightjs@9.16.2/highlight.pack.min.js"></script>
<script>
    hljs.initHighlightingOnLoad();
</script>

<script src="//cdn.jsdelivr.net/npm/nprogress@0.2.0/nprogress.min.js"></script>



<script src="//cdn.jsdelivr.net/npm/jquery.cookie@1.4.1/jquery.cookie.min.js" ></script>

<script src="/js/iconfont.js?v=1.1.0" ></script>

<meta name="generator" content="Hexo 5.0.0"></head>
<div style="display: none">
  <input class="theme_disqus_on" value="false">
  <input class="theme_preload_comment" value="">
  <input class="theme_blog_path" value="">
  <input id="theme_shortcut" value="true" />
</div>


<body>
<aside class="nav">
    <div class="nav-left">
        <a href="/" class="avatar_target">
    <img class="avatar" src="/img/avatar.jpg" />
</a>
<div class="author">
    <span>MOLU</span>
</div>

<div class="icon">
    
        
        <a title="rss" href="/atom.xml" target="_blank">
            
                <i class="iconfont icon-rss"></i>
            
        </a>
        
    
        
        <a title="github" href="https://github.com/molu-ggg" target="_blank">
            
                <i class="iconfont icon-github"></i>
            
        </a>
        
    
        
    
        
    
        
        <a title="email" href="mailto:2572876783@qq.com" target="_blank">
            
                <i class="iconfont icon-email"></i>
            
        </a>
        
    
</div>




<ul>
    <li><div class="all active" data-rel="All">All<small>(35)</small></div></li>
    
        
            
            <li><div data-rel="AI论文">AI论文<small>(5)</small></div>
                
            </li>
            
        
    
        
            
            <li><div data-rel="专业知识">专业知识<small>(2)</small></div>
                
            </li>
            
        
    
        
            
            <li><div data-rel="数学">数学<small>(2)</small></div>
                
            </li>
            
        
    
        
            
            <li><div data-rel="机器学习">机器学习<small>(12)</small></div>
                
            </li>
            
        
    
        
            
            <li><div data-rel="经典算法">经典算法<small>(10)</small></div>
                
            </li>
            
        
    
</ul>
<div class="left-bottom">
    <div class="menus">
    
    
    
    
    </div>
    <div><a class="about  hasFriend  site_url"  href="/about">About</a><a style="width: 50%"  class="friends">Friends</a></div>
</div>
<input type="hidden" id="yelog_site_posts_number" value="35">

<div style="display: none">
    <span id="busuanzi_value_site_uv"></span>
    <span id="busuanzi_value_site_pv"></span>
</div>

    </div>
    <div class="nav-right">
        <div class="friends-area">
    <div class="friends-title">
        Links
        <i class="iconfont icon-left"></i>
    </div>
    <div class="friends-content">
        <ul>
            
            <li><a target="_blank" href="http://yelog.org/">叶落阁</a></li>
            
        </ul>
    </div>
</div>
        <div class="title-list">
    <div class="right-top">
        <div id="default-panel">
            <i class="iconfont icon-search" data-title="搜索 快捷键 i"></i>
            <div class="right-title">All</div>
            <i class="iconfont icon-file-tree" data-title="切换到大纲视图 快捷键 w"></i>
        </div>
        <div id="search-panel">
            <i class="iconfont icon-left" data-title="返回"></i>
            <input id="local-search-input" />
            <label class="border-line" for="input"></label>
            <i class="iconfont icon-case-sensitive" data-title="大小写敏感"></i>
            <i class="iconfont icon-tag" data-title="标签"></i>
        </div>
        <div id="outline-panel" style="display: none">
            <div class="right-title">大纲</div>
            <i class="iconfont icon-list" data-title="切换到文章列表"></i>
        </div>
    </div>

    <div class="tags-list">
    <input id="tag-search" />
    <div class="tag-wrapper">
        
            <li class="article-tag-list-item">
                <i class="iconfont icon-tag"></i><a>50</a>
            </li>
        
            <li class="article-tag-list-item">
                <i class="iconfont icon-tag"></i><a>AI</a>
            </li>
        
            <li class="article-tag-list-item">
                <i class="iconfont icon-tag"></i><a>数据结构，最短路径，图</a>
            </li>
        
            <li class="article-tag-list-item">
                <i class="iconfont icon-tag"></i><a>机器学习</a>
            </li>
        
            <li class="article-tag-list-item">
                <i class="iconfont icon-tag"></i><a>相似度计算</a>
            </li>
        
    </div>

</div>

    
    <div id="local-search-result">

    </div>
    
    <nav id="title-list-nav">
        
        <a id="top" class="All 经典算法 "
           href="/2020/07/20/b_leetcode/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="leetcode &amp; 蓝桥">leetcode &amp; 蓝桥</span>
            <span class="post-date" title="2020-07-20 00:00:00">2020/07/20</span>
        </a>
        
        <a id="top" class="All AI论文 "
           href="/2020/07/10/d_GAT/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="GAT">GAT</span>
            <span class="post-date" title="2020-07-10 08:50:20">2020/07/10</span>
        </a>
        
        <a  class="All 专业知识 "
           href="/2021/03/12/%E8%AE%A1%E7%AE%97%E6%9C%BA%E7%BB%84%E6%88%90%E5%8E%9F%E7%90%86/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="计算机组成原理">计算机组成原理</span>
            <span class="post-date" title="2021-03-12 00:00:00">2021/03/12</span>
        </a>
        
        <a  class="All 专业知识 "
           href="/2021/03/12/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="数据结构">数据结构</span>
            <span class="post-date" title="2021-03-12 00:00:00">2021/03/12</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/12/05/b_%E5%8A%A8%E6%80%81%E8%A7%84%E5%88%92%E4%B8%8E%E8%AE%B0%E5%BF%86%E5%8C%96%E6%90%9C%E7%B4%A2/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="动态规划记忆化">动态规划记忆化</span>
            <span class="post-date" title="2020-12-05 00:00:00">2020/12/05</span>
        </a>
        
        <a  class="All AI论文 "
           href="/2020/07/25/d_300-paperDAT/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="DAT for recommender">DAT for recommender</span>
            <span class="post-date" title="2020-07-25 08:50:20">2020/07/25</span>
        </a>
        
        <a  class="All 数学 "
           href="/2020/07/20/a_%E7%AE%97%E6%B3%95/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="算法总结">算法总结</span>
            <span class="post-date" title="2020-07-20 00:00:00">2020/07/20</span>
        </a>
        
        <a  class="All 数学 "
           href="/2020/07/20/a_%E6%95%B0%E5%AD%A6/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="数学">数学</span>
            <span class="post-date" title="2020-07-20 00:00:00">2020/07/20</span>
        </a>
        
        <a  class="All "
           href="/2020/07/05/%E5%AF%B9%E9%9A%90%E7%A7%98%E7%9A%84%E8%A7%92%E8%90%BD%E7%94%B5%E8%A7%86%E5%89%A7%E7%9A%84%E6%84%9F%E6%82%9F%E4%BB%A5%E5%8F%8A%E8%AE%A4%E8%AF%86/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="对隐秘的角落电视剧的感悟以及认识">对隐秘的角落电视剧的感悟以及认识</span>
            <span class="post-date" title="2020-07-05 00:00:00">2020/07/05</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/07/04/a_%E6%A8%A1%E6%9D%BF%E5%BA%93/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="模板库">模板库</span>
            <span class="post-date" title="2020-07-04 08:50:20">2020/07/04</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2020/07/04/a_%E7%99%BE%E5%BA%A6%E6%8A%80%E6%9C%AF%E8%AE%A4%E8%AF%81/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="百度技术认证">百度技术认证</span>
            <span class="post-date" title="2020-07-04 08:50:20">2020/07/04</span>
        </a>
        
        <a  class="All AI论文 "
           href="/2020/07/01/d_GCN%E8%AE%BA%E6%96%87%E8%A7%A3%E8%AF%BB/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="GCN">GCN</span>
            <span class="post-date" title="2020-07-01 08:50:20">2020/07/01</span>
        </a>
        
        <a  class="All AI论文 "
           href="/2020/06/04/d_word2vec_node2vec/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="Word2vec &amp; Node2vec">Word2vec &amp; Node2vec</span>
            <span class="post-date" title="2020-06-04 08:50:20">2020/06/04</span>
        </a>
        
        <a  class="All AI论文 "
           href="/2020/05/05/d_GAN/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="GAN">GAN</span>
            <span class="post-date" title="2020-05-05 08:50:20">2020/05/05</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2020/05/04/d_Deepwalk/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="DEEPWALK">DEEPWALK</span>
            <span class="post-date" title="2020-05-04 08:50:20">2020/05/04</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/02/15/b_%E8%B4%AA%E5%BF%83/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="贪心与动态规划">贪心与动态规划</span>
            <span class="post-date" title="2020-02-15 00:00:00">2020/02/15</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2020/02/12/a_2020/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="机器学习零散知识记录">机器学习零散知识记录</span>
            <span class="post-date" title="2020-02-12 00:00:00">2020/02/12</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/02/12/b_%E8%83%8C%E5%8C%85%E9%97%AE%E9%A2%98/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="动态规划——背包问题">动态规划——背包问题</span>
            <span class="post-date" title="2020-02-12 00:00:00">2020/02/12</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/02/03/b_%E8%93%9D%E6%A1%A5%E6%9D%AF%E7%BB%83%E4%B9%A0%E9%A2%98/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="蓝桥杯">蓝桥杯</span>
            <span class="post-date" title="2020-02-03 00:00:00">2020/02/03</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2020/01/20/c_%E5%86%B3%E7%AD%96%E6%A0%91/"
           data-tag="50"
           data-author="" >
            <span class="post-title" title="决策树总结">决策树总结</span>
            <span class="post-date" title="2020-01-20 00:00:00">2020/01/20</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2020/01/12/b_%E5%85%A8%E6%8E%92%E5%88%97/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="全排列">全排列</span>
            <span class="post-date" title="2020-01-12 00:00:00">2020/01/12</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2019/12/02/b_%E6%9C%89%E7%A9%B7%E8%87%AA%E5%8A%A8%E6%9C%BA/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="有穷自动机">有穷自动机</span>
            <span class="post-date" title="2019-12-02 00:00:00">2019/12/02</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2019/11/20/b_%E9%94%99%E6%8E%92%E5%85%AC%E5%BC%8F/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="错排公式">错排公式</span>
            <span class="post-date" title="2019-11-20 00:00:00">2019/11/20</span>
        </a>
        
        <a  class="All "
           href="/2019/10/29/a_%E5%BC%97%E6%B4%9B%E4%BC%8A%E5%BE%B7/"
           data-tag="数据结构，最短路径，图"
           data-author="" >
            <span class="post-title" title="最短路径之弗洛伊德算法">最短路径之弗洛伊德算法</span>
            <span class="post-date" title="2019-10-29 00:00:00">2019/10/29</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/08/04/c_BPR/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="贝叶斯">贝叶斯</span>
            <span class="post-date" title="2019-08-04 08:50:20">2019/08/04</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/08/02/c_matrix/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="矩阵分解与正则化">矩阵分解与正则化</span>
            <span class="post-date" title="2019-08-02 08:25:59">2019/08/02</span>
        </a>
        
        <a  class="All "
           href="/2019/07/28/c_coordination1/"
           data-tag="AI,机器学习,相似度计算"
           data-author="" >
            <span class="post-title" title="协同过滤-上">协同过滤-上</span>
            <span class="post-date" title="2019-07-28 08:25:59">2019/07/28</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/07/26/c_cnn/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="cnn">cnn</span>
            <span class="post-date" title="2019-07-26 18:54:51">2019/07/26</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/07/22/c_coordination2/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="协同过滤—下">协同过滤—下</span>
            <span class="post-date" title="2019-07-22 18:54:26">2019/07/22</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/07/22/c_kmeans/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="K-means算法">K-means算法</span>
            <span class="post-date" title="2019-07-22 18:53:07">2019/07/22</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/07/21/c_nerve2/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="神经网络-下">神经网络-下</span>
            <span class="post-date" title="2019-07-21 11:37:18">2019/07/21</span>
        </a>
        
        <a  class="All "
           href="/2019/07/20/c_nerve1/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="神经网络—上">神经网络—上</span>
            <span class="post-date" title="2019-07-20 17:29:10">2019/07/20</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/06/22/c_gradient/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="线性回归与梯度下降算法">线性回归与梯度下降算法</span>
            <span class="post-date" title="2019-06-22 08:26:36">2019/06/22</span>
        </a>
        
        <a  class="All 机器学习 "
           href="/2019/06/19/c_flyback/"
           data-tag="AI,机器学习"
           data-author="" >
            <span class="post-title" title="线性回归">线性回归</span>
            <span class="post-date" title="2019-06-19 00:00:00">2019/06/19</span>
        </a>
        
        <a  class="All 经典算法 "
           href="/2019/05/20/b_%E8%A7%86%E9%A2%91%E7%BB%8F%E5%85%B8%E7%AE%97%E6%B3%95/"
           data-tag=""
           data-author="" >
            <span class="post-title" title="简单入门算法">简单入门算法</span>
            <span class="post-date" title="2019-05-20 00:00:00">2019/05/20</span>
        </a>
        
        <div id="no-item-tips">

        </div>
    </nav>
    <div id="outline-list">
    </div>
</div>
    </div>
    <div class="hide-list">
        <div class="semicircle" data-title="切换全屏 快捷键 s">
            <div class="brackets first"><</div>
            <div class="brackets">&gt;</div>
        </div>
    </div>
</aside>
<div id="post">
    <div class="pjax">
        <article id="post-c_决策树" class="article article-type-post" itemscope itemprop="blogPost">
    
        <h1 class="article-title">决策树总结</h1>
    
    <div class="article-meta">
        
        
        
        <span class="book">
            <i class="iconfont icon-category"></i>
            
            
            <a  data-rel="机器学习">机器学习</a>
            
        </span>
        
        
        <span class="tag">
            <i class="iconfont icon-tag"></i>
            
            <a class="color3">50</a>
            
        </span>
        
    </div>
    <div class="article-meta">
        
            Created At : <time class="date" title='Updated At: 2020-08-09 14:28:07'>2020-01-20 00:00</time>
        
    </div>
    <div class="article-meta">
        
        
        <span id="busuanzi_container_page_pv">
            Views 👀 :<span id="busuanzi_value_page_pv">
                <span class="count-comment">
                    <span class="spinner">
                      <div class="cube1"></div>
                      <div class="cube2"></div>
                    </span>
                </span>
            </span>
        </span>
        
        
    </div>
    
    <div class="toc-ref">
    
        <ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#%E4%BB%80%E4%B9%88%E6%98%AF%E5%86%B3%E7%AD%96%E6%A0%91%EF%BC%9F"><span class="toc-text">什么是决策树？</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E4%B8%BB%E8%A6%81%E6%AD%A5%E9%AA%A4"><span class="toc-text">主要步骤</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1%E7%89%B9%E5%BE%81%E7%9A%84%E9%80%89%E5%8F%96"><span class="toc-text">1特征的选取</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E5%86%B3%E7%AD%96%E6%A0%91%E7%9A%84%E7%94%9F%E6%88%90"><span class="toc-text">2 决策树的生成</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#3-%E5%86%B3%E7%AD%96%E6%A0%91%E7%9A%84%E4%BF%AE%E5%89%AA"><span class="toc-text">3 决策树的修剪</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#1%E7%89%B9%E5%BE%81%E7%9A%84%E9%80%89%E5%8F%96-1"><span class="toc-text">1特征的选取</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E7%86%B5"><span class="toc-text">熵</span></a><ol class="toc-child"><li class="toc-item toc-level-4"><a class="toc-link" href="#%E7%86%B5%E7%9A%84%E9%87%8F%E5%8C%96"><span class="toc-text">熵的量化</span></a></li></ol></li><li class="toc-item toc-level-3"><a class="toc-link"><span class="toc-text"> </span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E4%BF%A1%E6%81%AF"><span class="toc-text">信息</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E4%BF%A1%E6%81%AF%E5%A2%9E%E7%9B%8A"><span class="toc-text">信息增益</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%99%AA%E9%9F%B3"><span class="toc-text">噪音</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#2-%E5%86%B3%E7%AD%96%E6%A0%91%E7%9A%84%E7%94%9F%E6%88%90-1"><span class="toc-text">2.决策树的生成</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#3-%E5%86%B3%E7%AD%96%E6%A0%91%E7%9A%84%E4%BF%AE%E5%89%AA-1"><span class="toc-text">3.决策树的修剪</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%89%AA%E6%9E%9D%E5%A4%84%E7%90%86%E2%80%94%E2%80%94%E9%A2%84%E5%A4%84%E7%90%86"><span class="toc-text">剪枝处理——预处理</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%89%AA%E6%9E%9D%E5%A4%84%E7%90%86%E2%80%94%E2%80%94%E5%90%8E%E5%A4%84%E7%90%86"><span class="toc-text">剪枝处理——后处理</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%A6%82%E6%9E%9C%E8%AF%A5%E5%B1%9E%E6%80%A7%EF%BC%8C%E6%98%AF%E8%BF%9E%E7%BB%AD%E5%80%BC%EF%BC%8C%E5%BA%94%E8%AF%A5%E6%80%8E%E4%B9%88%E5%A4%84%E7%90%86%EF%BC%9F"><span class="toc-text">如果该属性，是连续值，应该怎么处理？</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E9%82%A3%E7%BC%BA%E5%A4%B1%E6%95%B0%E6%8D%AE%E6%80%8E%E4%B9%88%E5%8A%9E%EF%BC%9F"><span class="toc-text">那缺失数据怎么办？</span></a></li></ol></li></ol></li></ol>
    
<style>
    .left-col .switch-btn,
    .left-col .switch-area {
        display: none;
    }
    .toc-level-4 i,
    .toc-level-4 ol {
        display: none !important;
    }
</style>
</div>
    
    <div class="article-entry" itemprop="articleBody">
      
        <h1 id="什么是决策树？"><a href="#什么是决策树？" class="headerlink" title="什么是决策树？"></a>什么是决策树？</h1><p>是一种分类与回归算法，是把决策过程用树状图表示出来</p>
<p><strong>有人说决策树其实是定义在特征空间与类空间上的条件概率分布，如何评价？</strong></p>
<h1 id="主要步骤"><a href="#主要步骤" class="headerlink" title="主要步骤"></a>主要步骤</h1><h3 id="1特征的选取"><a href="#1特征的选取" class="headerlink" title="1特征的选取"></a>1特征的选取</h3><h3 id="2-决策树的生成"><a href="#2-决策树的生成" class="headerlink" title="2 决策树的生成"></a>2 决策树的生成</h3><h3 id="3-决策树的修剪"><a href="#3-决策树的修剪" class="headerlink" title="3 决策树的修剪"></a>3 决策树的修剪</h3><h2 id="1特征的选取-1"><a href="#1特征的选取-1" class="headerlink" title="1特征的选取"></a>1特征的选取</h2><p>选取最优的属性进行划分</p>
<p>如何知道谁是最优的属性那？这里我们引出几个概念：</p>
<p>熵，信息增益，</p>
<h3 id="熵"><a href="#熵" class="headerlink" title="熵"></a>熵</h3><p>一种事物的不确定性，不确定性程度越大，熵越大。</p>
<h4 id="熵的量化"><a href="#熵的量化" class="headerlink" title="熵的量化"></a>熵的量化</h4><p>以抛硬币事件为基准，作为衡量标准。</p>
<p>以等可能事件为例：</p>
<p>量化公式为 </p>
<script type="math/tex; mode=display">
n=\log _{2} m</script><p>m为事件发生的情况个数（等可能事件），n就是所求的信息熵，单位为bit</p>
<p>例如：小明等可能做8件事情，那么信息熵为</p>
<script type="math/tex; mode=display">
n=\log _{2} 8=3 bit</script><p>一般来说，我们通过信息来改变事物不确定性的概率，称为概率不等事件</p>
<script type="math/tex; mode=display">
\operatorname{Ent}(D)=\sum_{k=1}^{|\mathcal{Y}|} p_{k} \log _{2} 1/p_{k}</script><script type="math/tex; mode=display">
\operatorname{Ent}(D)=-\sum_{k=1}^{|\mathcal{Y}|} p_{k} \log _{2} p_{k}</script><p>以上两个公式等价，第一个便于理解。pk是某件事物发生第k种可能发生的概率，将所有发生的可能相加。</p>
<h3 id=""><a href="#" class="headerlink" title=" "></a> </h3><h3 id="信息"><a href="#信息" class="headerlink" title="信息"></a>信息</h3><p>消除不确定的事物，调整概率，排除干扰，确定情况</p>
<h3 id="信息增益"><a href="#信息增益" class="headerlink" title="信息增益"></a>信息增益</h3><script type="math/tex; mode=display">
\operatorname{Gain}(D, a)=\operatorname{Ent}(D)-\sum_{v=1}^{V} \frac{\left|D^{v}\right|}{|D|} \operatorname{Ent}\left(D^{v}\right)</script><p>可以这样理解：</p>
<p>Ent（D)是没有信息之前的，后面的公式是得到信息之后的所有可能性的信息熵的和，由于每件事情不等可能，所以要乘以他的比率|D^v|/|D|</p>
<p>比如：好坏瓜，一开始只知道好坏瓜比例 8:9</p>
<script type="math/tex; mode=display">
\operatorname{Ent}(D)=-\sum_{k=1}^{2} p_{k} \log _{2} p_{k}=-\left(\frac{8}{17} \log _{2} \frac{8}{17}+\frac{9}{17} \log _{2} \frac{9}{17}\right)=0.998</script><p>后来，通过信息得，可以通过叶子的颜色区别好坏瓜。黄叶子有6个样例，3好3坏，绿叶子6个样例，4好2坏，深绿5个样例，1好4坏</p>
<script type="math/tex; mode=display">
\begin{aligned}
&\operatorname{Ent}\left(D^{1}\right)=-\left(\frac{3}{6} \log _{2} \frac{3}{6}+\frac{3}{6} \log _{2} \frac{3}{6}\right)=1.000\\
&\begin{array}{l}
{\operatorname{Ent}\left(D^{2}\right)=-\left(\frac{4}{6} \log _{2} \frac{4}{6}+\frac{2}{6} \log _{2} \frac{2}{6}\right)=0.918} \\
{\operatorname{Ent}\left(D^{3}\right)=-\left(\frac{1}{5} \log _{2} \frac{1}{5}+\frac{4}{5} \log _{2} \frac{4}{5}\right)=0.722}
\end{array}
\end{aligned}</script><p>根据样例所占总体样例的总数，可以确定每件事物的可能性：</p>
<script type="math/tex; mode=display">
\begin{array}{l}
{Gain(D,"叶子“)=\operatorname{Ent}(D)-\sum_{v=1}^{3} \frac{\left|D^{v}\right|}{|D|} \operatorname{Ent}\left(D^{v}\right)} \\
{=0.998-\left(\frac{6}{17} \times 1.000+\frac{6}{17} \times 0.918+\frac{5}{17} \times 0.722\right)} \\
{=0.109}
\end{array}</script><p>所以信息增益是0.109，是正数，可以作为信息。</p>
<p>如果信息增益越大，我们约可以把该信息作为首要判断的信息，体现在树中就是靠近根。</p>
<h3 id="噪音"><a href="#噪音" class="headerlink" title="噪音"></a>噪音</h3><p>增加对某件事物的确定的干扰，不能消除对某件不确定性的事物下</p>
<p>数据就是信息和噪音并存。</p>
<h2 id="2-决策树的生成-1"><a href="#2-决策树的生成-1" class="headerlink" title="2.决策树的生成"></a>2.决策树的生成</h2><p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216105922.png" alt=""></p>
<p>通过信息增益，按照每个属性，对信息增益的递减排序，依次使用属性来生成树：</p>
<p>脐部有三种情况，凹陷，微凹，平坦，分类</p>
<p>然后在凹陷属性所包含的数据集中，信息增益最大的是色泽，在此应该以色泽为属性再分叉</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216105705.png" alt=""></p>
<h2 id="3-决策树的修剪-1"><a href="#3-决策树的修剪-1" class="headerlink" title="3.决策树的修剪"></a>3.决策树的修剪</h2><h3 id="剪枝处理——预处理"><a href="#剪枝处理——预处理" class="headerlink" title="剪枝处理——预处理"></a>剪枝处理——预处理</h3><p>将分叉过后，正确率下降的或者不变的剪去</p>
<p>如果如下图，只有一个属性判断好坏瓜，那除了平坦都是好瓜，这是根据正反例的比率决定好坏</p>
<p>进一步细分，如果精度下降或者不变，不再细分。</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216105544.png" alt=""></p>
<h3 id="剪枝处理——后处理"><a href="#剪枝处理——后处理" class="headerlink" title="剪枝处理——后处理"></a>剪枝处理——后处理</h3><p>分完过后，将细分过后精度下降的剪去即可。</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216111034.png" alt=""></p>
<h3 id="如果该属性，是连续值，应该怎么处理？"><a href="#如果该属性，是连续值，应该怎么处理？" class="headerlink" title="如果该属性，是连续值，应该怎么处理？"></a>如果该属性，是连续值，应该怎么处理？</h3><p>那就是不停的尝试阈值。一般连续值会给你多个数据，将这些数据从小到大排列起来,比如浓度:a1,a2,a3,a4,a5，然后分别的阈值是a1+a2/2,a2+a3/2………然后算出信息增益增大的那个数</p>
<h3 id="那缺失数据怎么办？"><a href="#那缺失数据怎么办？" class="headerlink" title="那缺失数据怎么办？"></a>那缺失数据怎么办？</h3><p>就是先将没有缺失的数据分在一起，记录个数n，进行计算信息熵与各个属性的信息增益m，若一共有N个数据，最后的该属性的信息增益是：n/N*m;</p>
<p>比如：</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216115151.png" alt=""></p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216115200.png" alt=""></p>
<p>多变量决策树：</p>
<p>两个属性，按照比率相加。两个属性成某一种关系而存在，尤其是连续值的属性。</p>
<p>比如，人的BMI值，就是跟任瑟身高与体重有关，这时候，只是按部就班一步步探讨，开支会很大，那我们就构造类似函数一样，函数值作为一个属性，参与进去。</p>
<p>如：两个属性：</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216120038.png" alt=""></p>
<p>一下将两个属性联系在一起：</p>
<p><img src="https://moluggg.oss-cn-qingdao.aliyuncs.com/img/20200216120118.png" alt=""></p>

      
       <hr><span style="font-style: italic;color: gray;"> 转载请注明来源，欢迎对文章中的引用来源进行考证，欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论，也可以邮件至 2572876783@qq.com </span>
    </div>
</article>


<p>
    <a  class="dashang" onclick="dashangToggle()">💰</a>
</p>






    
        <!-- MathJax配置，可通过单美元符号书写行内公式等 -->
<script type="text/x-mathjax-config">
    MathJax.Hub.Config({
    "HTML-CSS": {
        preferredFont: "TeX",
        availableFonts: ["STIX","TeX"],
        linebreaks: { automatic:true },
        EqnChunk: (MathJax.Hub.Browser.isMobile ? 10 : 50)
    },
    tex2jax: {
        inlineMath: [ ["$", "$"], ["\\(","\\)"] ],
        processEscapes: true,
        ignoreClass: "tex2jax_ignore|dno",
        skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
    },
    TeX: {
        equationNumbers: { autoNumber: "AMS" },
        noUndefined: { attributes: { mathcolor: "red", mathbackground: "#FFEEEE", mathsize: "90%" } },
        Macros: { href: "{}" }
    },
    messageStyle: "none"
    });
</script>
<!-- 给MathJax元素添加has-jax class -->
<script type="text/x-mathjax-config">
    MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for(i=0; i < all.length; i += 1) {
            all[i].SourceElement().parentNode.className += ' has-jax';
        }
    });
</script>
<!-- 通过连接CDN加载MathJax的js代码 -->
<script type="text/javascript" async
        src="//cdn.jsdelivr.net/npm/mathjax@2.7.8/unpacked/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
<input type="hidden" id="MathJax-js"
        value="//cdn.jsdelivr.net/npm/mathjax@2.7.8/unpacked/MathJax.js?config=TeX-MML-AM_CHTML">
</input>
    




    </div>
    <div class="copyright">
        <p class="footer-entry">
    ©2016-2020 MOLU
</p>
<p class="footer-entry">Built with <a href="https://hexo.io/" target="_blank">Hexo</a> and <a href="https://github.com/yelog/hexo-theme-3-hexo" target="_blank">3-hexo</a> theme</p>

    </div>
    <div class="full-toc">
        <button class="full" data-title="切换全屏 快捷键 s"><span class="min "></span></button>
<a class="" id="rocket" ></a>

    </div>
</div>

<div class="hide_box" onclick="dashangToggle()"></div>
<div class="shang_box">
    <a class="shang_close"  onclick="dashangToggle()">×</a>
    <div class="shang_tit">
        <p>Help us with donation</p>
    </div>
    <div class="shang_payimg">
        <div class="pay_img">
            <img src="/img/alipay.jpg" class="alipay" title="扫码支持">
            <img src="/img/weixin.jpg" class="weixin" title="扫码支持">
        </div>
    </div>
    <div class="shang_payselect">
        <span><label><input type="radio" name="pay" checked value="alipay">alipay</label></span><span><label><input type="radio" name="pay" value="weixin">weixin</label></span>
    </div>
</div>


</body>
<script src="/js/jquery.pjax.js?v=1.1.0" ></script>

<script src="/js/script.js?v=1.1.0" ></script>
<script>
    var img_resize = 'default';
    function initArticle() {
        /*渲染对应的表格样式*/
        
            $("#post .pjax table").addClass("green_title");
        

        /*渲染打赏样式*/
        
        $("input[name=pay]").on("click", function () {
            if($("input[name=pay]:checked").val()=="weixin"){
                $(".shang_box .shang_payimg .pay_img").addClass("weixin_img");
            } else {
                $(".shang_box .shang_payimg .pay_img").removeClass("weixin_img");
            }
        })
        

        /*高亮代码块行号*/
        

        /*访问数量*/
        
        $.getScript("//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js");
        

        /*代码高亮，行号对齐*/
        $('.pre-numbering').css('line-height',$('.has-numbering').css('line-height'));

        
        
    }

    /*打赏页面隐藏与展示*/
    
    function dashangToggle() {
        $(".shang_box").fadeToggle();
        $(".hide_box").fadeToggle();
    }
    

</script>

<!--加入行号的高亮代码块样式-->

<!--自定义样式设置-->
<style>
    
    
    .nav {
        width: 542px;
    }
    .nav.fullscreen {
        margin-left: -542px;
    }
    .nav-left {
        width: 120px;
    }
    
    
    @media screen and (max-width: 1468px) {
        .nav {
            width: 492px;
        }
        .nav.fullscreen {
            margin-left: -492px;
        }
        .nav-left {
            width: 100px;
        }
    }
    
    
    @media screen and (max-width: 1024px) {
        .nav {
            width: 492px;
            margin-left: -492px
        }
        .nav.fullscreen {
            margin-left: 0;
        }
    }
    
    @media screen and (max-width: 426px) {
        .nav {
            width: 100%;
        }
        .nav-left {
            width: 100%;
        }
    }
    
    
    .nav-right .title-list nav a .post-title, .nav-right .title-list #local-search-result a .post-title {
        color: #383636;
    }
    
    
    .nav-right .title-list nav a .post-date, .nav-right .title-list #local-search-result a .post-date {
        color: #5e5e5f;
    }
    
    
    .nav-right nav a.hover, #local-search-result a.hover{
        background-color: #e2e0e0;
    }
    
    

    /*列表样式*/
    

    /* 背景图样式 */
    
    


    /*引用块样式*/
    

    /*文章列表背景图*/
    

    
</style>







</html>
